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DETERMINE A BLOCK WIDTH FROM 
THE NUMBER OF THREADS AND THE 
SIZE OF THE PROBLEM 
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COPY BLOCKS (D AND Li) TO BE 
PROCESSED BY THREADS 
(PROCESSORS) EACH DETERMINING A 
BLOCK TO BE PROCESSED TO A 
WORKING AREA 
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LET EACH THREAD TAKE IN PIVOTS. 

IDENTIFY A LARGEST PIVOT AND 
TRANSPOSE A ROW VECTOR BY USING A 

SHARED AREA. LET EACH THREAD 
PERFORM LU FACTORIZATION ON D + Li. 
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LET THE THREADS UPDATE Ui BY 
USING LL IN PARALLEL 
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LET THE THREADS UPDATE Ci WITH 
A PRODUCT OF Li AND U IN 
PARALLEL 
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DO i = 1, iblks 

TMP=0,0 DO;jj=0 
DO j = i, leng 

IF(ABS LTO. 0), GT , TMP)THEN 

TMP=ABS(LT(j, i)) 

jj=j 
ENDIF 
ENDDO 



IF(jj, GT, i) THEN 

DO k=1 , iblks 

TMPX=LT(i, k) 

LT(i. k)=LT(jj, k) 

LT(jj, k)=TMPX 
ENDDO 
END IF 

DO k=i + 1, iblks 

LT(i, k)=LT(i. k)/LT(i, i) 
ENDDO 

DO k=i + 1, iblks 
DO l = i + 1, long 

LT(I, k)=LT(l. k)-LT(l, i) x LT(i, k) 
ENDDO 
ENDDO 

ENDDO 
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iblks 



lengi 




LTi 



DO i=1, iblks 

TMP=aO DO;jj = 0 
DO j = 1, lengi 

IF(ABS LTi 0. i)). GT , TMP)THEN 

TMP=ABS(LTi(j, i)) 

ii = ■ 

ENDIF 
ENDDO 

pivpotC #THREAD) =]j 
(#THREAD IS A THREAD NUMBER. IN THE 
CASE OF PARALLEL PROCESSING BY 4 
THREADS. #THREAD IS PRESCRIBED AS 
1.2,3 AND 4.) 
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BARRIER SYNCHRONIZATION 
jF(#THREAD, EQ, 1) 

jx=0;GPIVOT=0 
DO ix=1, 4 

IF(pivot(ix), GT, jx. AND, PIVOT(ix). GT. iblks) G PIVOT = ix 
(THE NUMBER OF A THREAD HAVING A LARGEST NUMBER) 

ENDDO 
END IF 

BARRIER SYNCHRONIZATION 
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IF(#THREAD, EQ, G PIVOT) THEN 

IF(jj. GT. i)THEN 

DO ix=1, iblks 

ROW(ix)=LTi(jj. ix) 
ENDDO 
END IF 

BARRIER SYN C HRONIZ A TION 
IFCGPIVOT. EQ, 0)THEN 
IF(jj. GT. i)THEN 
DO i = 1. iblks, ^ 
TMPW=LTi(i, ix) 
LTi(i, ix)=LTi(jj, ix) 
LTKjj, ix)=TMPW 
ENDDO 
END IF 
ELSE 

IF(#THREAD, EQ, G PIVOT) THEN 
DO ix = 1, iblks 

LTKjj, ix)=LTi(i, ix) 
LTi(i, ix)=ROW(ix) 
ENDDO 
ELSE 

DO ix=1, iblks 
LTiCi, ix)=ROW(ix) 
ENDDO 
ENDIF 



(7) 



f SINCE TRASPOSITION HAS^ 
BEEN CARRIED OUT IN AN IP, 
THE THREADS EXECUTE THE 
^PROCESSING IN PARALLEL^ 
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DO k=i+1, iblks, 

LTi(i, k)=LTi(i. k)/LT(i. i) 
ENDDO 
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DO k=i + 1, iblks 
DO l=i+1, lengi 

LTi (I, k)=LTi(l. k)-LTi(l. i)xLTi(i, k) 
ENDDO 
ENDDO 
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ENDDO 
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subroutine LU(LTi, k, iblks, ist. nwid) 

(WHERE LTi IS USED BY THREADS FOR STORING (D1 +IJ), 
k IS THE SIZE OF THE FIRST ONE DIMENSION OF LTi, 
iblks IS THE BLOCK WIDTH. 

ist IS A POSITION TO START THE Lu FACTORIZATION AND 
nwid IS THE WIDTH OF AN OBJECT SUBJECTED TO THE Lu FACTORIZATION) 
IF(nwid. eq, 8). Then ( A WIDTH OF 8 IS A MINIMUM ). 

LTi (ist: k, ist. ist+nwid— 1 ) IS SUBJECTED TO THE LU FACTORIZATION IN 
PARALLEL. 

HERE. THE PARTS (4) TO (1 0) OF FIG.9 ARE EXECUTED. ~^ 
IN THIS CASE, THE ROW-TRANSPOSING UNIT TRANSPOSES 
s_ LTi(i. 1. iblks) AT THE LENGTH iblk. J 



call LU(LTi, k, iblks, ist, nwid/2) 
call TRS( ) 

UPDATE LTi (ist: ist+nwid/ 2 — 1 , ist+nwid/ 2: ist+nwid). BY USING A 
LOWER-TRIANGULAR MATRIX LL OF LTi (ist: ist+nwid/2-1 . ist: ist+nwid/2 
-1 ). UPDATE IT BY MULTIPLYING IT BY LL + FROM THE LEFT. 

call MM( ) 

LTi (ist+nwid/2 :k, ist+nwid/2: ist+nwid) """"> 
=LTi (ist+ nwid/2 : k, ist+ nwid/2 : ist+ nwid) 
— LTi (ist+nwid/2 :k, ist: ist+nwid/2— 1 ) x 
^- LTi (ist: ist+nwid/2 — 1 , ist+nwid/2: ist+nwid) -J 

Barrier SYNCHRONIZATION 

call LU(LTi, k, iblks, ist+nwid/2. nwid/2 
end if 
return 

end subroutine 



else 
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subroutine l_TD(l_Ti, k, iblks, ist, nwid) 
IF(nwid. EQ, 8)THEN (THE WIDTH OF 8 IS THE MINIMUM) 
DOi=ist, ist+7 
DOj = i + 1, ist+7 
LTi(i, j)=LTi(j, i) 
LTi(j, i)=LTi(j, i)/LTi(i. i) 
ENDDO 

DO jy=i + 1, ist+7 
DO jx=jx, ist+7 

LTi(jx, jy)=LTi(jx, jy)-LTi(jx, i) x|_Ti(i. jy) 
ENDDO 
ENDDO 



UPDATE LTi(LTi(ist+8;k. ist:ist+7). 
SINCE DL T IS INCLUDED IN THE UPPER TRIANGLE OF 
LTi(LTi (ist. ist+7. ist: ist+7), UPDATE (PL T )~ 1 FROM THE RIGHT 
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ELSE 

call LDL(l_Ti, k, iblks, ist, nwid/2) 
COPY DL T TO 

• LTi (ist : ist+ nwid^2— 1 . ist+ nwid/* 2 : ist+ nwid — 1 ) . 

(D IS AN OBJECT ELEMENT OF LTi (ist: ist+nwid/2— 1 . ist: ist+nwid/2- 1 ) 
AND L IS 

LTi(ist+nwid/2:ist+nwid — 1 , ist: ist+nwid/2— 1 ), 
TRANSPOSING THIS L T .) 



•UPDATE LTi(ist+nwid/2:k, ist+nwid/2 : ist+nwid — 1). 

LTi(ist+nwid/2:k, ist+nwid/2: ist+nwid — 1 ) 
=LTi(ist:ist+nwid/2:k, ist+nwid/2: ist+nwid— 1) — 
LTi(ist+nwid/2:k, ist: ist + nwid — 1) X 
LTi(ist:ist+nwid/2— 1 , ist+nwid/2; ist+nwid — 1) 

CALL LDL (LTi, k, iblks, ist+nwid/2, nwid/2) 

ENDIF 

RETURN 

END 
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